#!/usr/bin/env python
# -*- coding: UTF-8 -*-

f1 = open('douban.data')
f2 = open('douban.label')
f1_w = open('train2.data', 'w')
f2_w = open('train2.label', 'w')
f3_w = open('dev2.data', 'w')
f4_w = open('dev2.label', 'w')
f5_w = open('test2.data', 'w')
f6_w = open('test2.label', 'w')

import numpy

train = []
dev = []
test = []
train_size = 2500
dev_size = 500
test_size = 1000
limit = train_size + test_size + dev_size
count = [0, 0, 0, 0]
for sentence, label in zip(f1, f2):
    if label.strip() == '20' and count[0] < limit:
        if count[0] < train_size:
            train.append((sentence, '0\n'))
        elif count[0] >= train_size and count[0] < train_size + dev_size:
            dev.append((sentence, '0\n'))
        else:
            test.append((sentence, '0\n'))
        count[0] += 1
    elif label.strip() == '40' and count[1] < limit:
        if count[1] < train_size:
            train.append((sentence, '0\n'))
        elif count[1] >= train_size and count[1] < train_size + dev_size:
            dev.append((sentence, '0\n'))
        else:
            test.append((sentence, '0\n'))
        count[1] += 1
    elif label.strip() == '80' and count[2] < limit:
        if count[2] < train_size:
            train.append((sentence, '1\n'))
        elif count[2] >= train_size and count[2] < train_size + dev_size:
            dev.append((sentence, '1\n'))
        else:
            test.append((sentence, '1\n'))
        count[2] += 1
    elif label.strip() == '100' and count[3] < limit:
        if count[3] < train_size:
            train.append((sentence, '1\n'))
        elif count[3] >= train_size and count[3] < train_size + dev_size:
            dev.append((sentence, '1\n'))
        else:
            test.append((sentence, '1\n'))
        count[3] += 1

numpy.random.shuffle(train)
numpy.random.shuffle(dev)
numpy.random.shuffle(test)

for sentence, label in train:
    f1_w.write(sentence)
    f2_w.write(label)
f1_w.close()
f2_w.close()

for sentence, label in dev:
    f3_w.write(sentence)
    f4_w.write(label)
f3_w.close()
f4_w.close()

for sentence, label in test:
    f5_w.write(sentence)
    f6_w.write(label)
f5_w.close()
f6_w.close()

